#!/usr/bin/Rscript
##########################################################################################
# Social Media and Policy Responses to the COVID-19 Pandemic in Switzerland
##########################################################################################
# Description:
##########################################################################################
# Figure 2
##########################################################################################
# Contents
##########################################################################################
# 1) Dependencies
# 2) Load all necessary Things
# 3) Load curated data
# 4) Plots and Tables 
# 4.1) Bar Plot Types
# 4.2) Plot Topic prevalence over time
# 4.3) Most used Links to other Media 
# 4.4) Most used Links to other Media by verified and unverified....
# 4.5) Re Tweet / Favorites Plot
# 4.6) Media Types shared
# 5) Word Clouds
# 9) Sentiment of Tweets
##########################################################################################
# 1) Dependencies
##########################################################################################
library(dplyr)
library(tidyr)
library(tidyverse)
library(data.table)
library(readr)
library(lubridate)
library(ggplot2)
library(graphlayouts)
library(scales)
library(purrr)
library(magrittr)
library(sysfonts)
library(cowplot)
##########################################################################################
# 2) Load all necessary Things
##########################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)
parent_path <- getwd()

# - load fonts used in plots
font_add_google("Montserrat", "Montserrat")
font_add_google("Roboto", "Roboto")

# load ggplot theme of the DigDemLab 
suppressMessages(suppressWarnings(source('ggplot_theme_ddl.R')))
# - model-configurations to consider
langs <- list(c("de"),
              c("fr"),
              c("it"),
              c("en"),
              c("de","fr","it"),
              c("de","fr","it","en"),
              c("all"))

langusges_l <- c("de","fr","it","en","de_fr_it","de_fr_it_en","alle")

lang_long <- c(" auf Deutsch", " auf Englisch", " auf Französisch", " auf Italienisch", " auf De & FR & IT", " auf De & FR & IT & EN", "")
lang_long <- c( " in German", " in English", " in French", " in Italian", " in DE & FR & IT", " in DE & FR & IT & EN", "")

colorlist1 <- list(`Anderes`="#BFA5A8", `Bund & Kantone`="#DD2461", `Medien (De)`="#4B8178", `Medien (Fr)`="#008E7A", `Partei`="#574144", `Medien (It)`="#32BEAE")
colorlist1 <- list(`Rest`="#BFA5A8", `Fed. Gov. & Cantons`="#DD2461", `Media (De)`="#4B8178", `Media (Fr)`="#008E7A", `Parties`="#574144", `Media (It)`="#32BEAE")
fillcolor1 <- unlist(colorlist1)
lim_max <- "2020-08-20"
##########################################################################################
# 3) Load curated data
##########################################################################################
# - load data:
tdf <- readRDS("../data/Twitter_data_minified.RDS") %>% dplyr::mutate(Datum = as.Date(Datum)) %>% 
                                               dplyr::filter(Datum > as.Date("2020-02-21") & Datum < as.Date("2020-08-23"))

tdf <- tdf %>% dplyr::filter(la == "de") 

sdf <- readRDS("../data/SMD_CDT_data_minified.RDS") %>% dplyr::filter(pubDateTime > as.Date("2020-02-21") & pubDateTime < as.Date("2020-08-23"))

sdf <- sdf %>% dplyr::filter(la == "de")

fdf <- readRDS("../data/Facebook_data_minified.RDS") %>% dplyr::mutate(Datum = as.Date(Datum)) %>%
                                                dplyr::filter(Datum > as.Date("2020-02-21") & Datum < as.Date("2020-08-23"))

fdf <- fdf %>% dplyr::filter(la == "de")


tdf <- tdf %>% mutate(Party = tolower(Party)) %>%
  dplyr::filter(Party %in% c("alternative - die grünen zug", "alternative-die grünen kanton zug", "bürgerlich-demokratische partei schweiz", "christlich-soziale partei",
                             "christlichdemokratische volkspartei der schweiz", "christlichdemokratische volkspartei oberwallis", "fdp.die liberalen", 
                             "grüne (basels starke alternative)", "grüne partei der schweiz", "grünliberale partei", "na", "nd", "schweizerische volkspartei",
                             "sozialdemokratische partei der schweiz", NA)) %>% 
  dplyr::mutate(Party = case_when(Party %in% c("grüne (basels starke alternative)", 
                                               "grüne partei der schweiz", 
                                               "alternative - die grünen zug",
                                               "alternative-die grünen kanton zug") ~ "Grüne",
                                  Party %in% c("sozialdemokratische partei der schweiz") ~ "SP",
                                  Party %in% c("schweizerische volkspartei") ~ "SVP",
                                  Party %in% c("fdp.die liberalen") ~ "FDP",
                                  Party %in% c("christdemokratische volkspartei der schweiz",
                                               "christlichdemokratische volkspartei der schweiz",
                                               "christlich-soziale partei", 
                                               "christlichdemokratische volkspartei oberwallis",
                                               "christlichsoziale volkspartei oberwallis") ~ "CVP",
                                  Party %in% c("grünliberale partei") ~ "GLP",
                                  Party %in% c("bürgerlich-demokratische partei schweiz") ~"BDP",
                                  Party %in% c("na", "nd") ~ "NA",
                                  TRUE ~ Party))

tdf <- tdf %>% mutate(Party = ifelse(Party %in% c("NA") == T, NA, Party))
# - remove tweets from actors we are not interested in:
tdf <- tdf %>% dplyr::filter(!Akteur.Typ %in% c("Institute","Gericht", "NA")) %>%
  dplyr::filter((!Akteur.Typ %in% c("Party","Person") & is.na(Party) == F)==F) %>% 
  dplyr::mutate(Akteur.Typ = as.character(Akteur.Typ))%>% 
  dplyr::mutate(Akteur.Typ = ifelse(is.na(Akteur.Typ), "ND", Akteur.Typ))


tdf <- tdf %>% dplyr::mutate(Akteur.Typ = as.character(Akteur.Typ)) %>% 
               dplyr::mutate(Akteur_Art = ifelse(Akteur.Typ == "Party", "Party",
                                          ifelse(Akteur.Typ == "Media", "Media", 
                                          ifelse(Akteur.Typ == "Person", "Politican", 
                                          ifelse(Akteur.Typ %in% c("Administration","Departement","Bundesamt"), "Gov",
                                          ifelse(Akteur.Typ %in% c("Organisation","Komitee"), "Org", 
                                          ifelse(Akteur.Typ =="ND", "SnowBallers", "NA")))))))

# - remove all retweets from tweets
retweet_out = T
if(retweet_out == T){
  tdf <- tdf %>% dplyr::filter(Is_retweet != T)
  unique(tdf$Is_retweet) # - check if all is  in order
}

gc()

# - factorize topic
tdf$topic <- factor(tdf$topic, levels = c("Covid19","Masks","App","App & Masks","Anderes"))
sdf$topic <- factor(sdf$topic, levels = c("Covid19","Masks","App","App & Masks","Anderes"))
fdf$topic <- factor(fdf$topic, levels = c("Covid19","Masks","App","App & Masks","Anderes"))

# Nice Descriptive numbers:
tdf %>% group_by(Akteur_Art, User_id) %>% summarise(n = n()) %>% group_by(Akteur_Art) %>% summarise(n = n())
tdf %>% group_by(Akteur_Art) %>% summarise(n = n())
##########################################################################################
# 4) Plots and Tables 
##########################################################################################
# Twitter Data...
#-----------------------------------------------------------------------------------------
timeplot_df <- tdf %>% dplyr::filter(la %in% c("de")) %>% mutate(Datum = as.Date(Datum)) %>% 
  dplyr::filter(Datum > as.Date("2020-02-21")) %>%
  mutate(day = lubridate::date(as.Date(Datum))) %>% 
  mutate(week = lubridate::date(cut(day, "week"))) %>%
  group_by(Akteur_Art, week, topic) %>% 
  summarise(count = n()) %>%
  mutate(week = as.Date(week)) %>%
  ungroup %>%
  tidyr::complete(week = seq.Date(as.Date("2020-02-24"), max(week), by="week"), topic, Akteur_Art, fill = list(count = 0)) %>%
  pivot_wider(names_from = topic, values_from = c(count) , values_fill = list(count = 0)) %>%
  dplyr::select(-c("Anderes","App & Masks")) %>% 
  mutate(freq_Masks = (Masks / Covid19) * 100,
         freq_App = (App / Covid19) * 100) %>%
  dplyr::select(-c(Masks, App, Covid19)) %>%
  mutate(freq_Masks = ifelse(!is.finite(freq_Masks) == T, 0,
                             ifelse(freq_Masks > 100, 0, freq_Masks)),
         freq_App = ifelse(!is.finite(freq_App) == T, 0,
                           ifelse(freq_App > 100, 0, freq_App))) %>% 
  pivot_longer(names_to = c(".value","topic"), values_drop_na = F, names_sep = "_", 
               cols = c(freq_Masks,freq_App))

timeplot_df$topic <- factor(timeplot_df$topic, levels = c("Masks","App")) 

# Filter Politicians and Parties
timeplot_df <- timeplot_df %>% dplyr::filter(Akteur_Art %in% c("Party", "Politican", "SnowBallers")) %>% dplyr::filter(topic %in% c("Masks","App"))

# SMD Data...
#-----------------------------------------------------------------------------------------
timeplot_dt <- sdf %>% dplyr::filter(la %in% c("de","fr","it")) %>% 
  dplyr::mutate(pubDateTime = as.Date(pubDateTime)) %>%
  dplyr::filter(pubDateTime > as.Date("2020-02-21")) %>% 
  mutate(day = lubridate::date(ymd(pubDateTime))) %>%
  mutate(week = lubridate::date(as.Date(cut(day, "week")))) %>% 
  group_by(la, week, topic) %>%                        
  summarise(count = n()) %>% 
  mutate(week = as.Date(week)) %>%
  ungroup %>%
  tidyr::complete(week = seq.Date(as.Date("2020-02-24"), max(week), by="week"), 
                  nesting(la), topic, fill = list(freq = 0, count = 0)) %>%
  pivot_wider(names_from = topic, values_from = c(count) , values_fill = list(count = 0)) %>%
  dplyr::select(-c("Anderes","App & Masks")) %>%
  mutate(freq_Masks = (Masks / Covid19) * 100,
           freq_App = (App / Covid19) * 100) %>%
  dplyr::select(-c(Masks, App, Covid19)) %>%
  mutate(freq_Masks = ifelse(!is.finite(freq_Masks) == T, 0,
                               ifelse(freq_Masks > 100, 0, freq_Masks)),
           freq_App = ifelse(!is.finite(freq_App) == T, 0,
                             ifelse(freq_App > 100, 0, freq_App))) %>%
  pivot_longer(names_to = c(".value","topic"), values_drop_na = F, names_sep = "_", 
               cols = c(freq_Masks,freq_App))

timeplot_dt$topic <- factor(timeplot_dt$topic, levels = c("Masks","App")) 

timeplot_dt$Akteur_Art <- "Media"
timeplot_dt$la <- NULL

# Filter Topic
timeplot_dt <- timeplot_dt %>% dplyr::filter(topic %in% c("Masks","App"))

# Combine Data 
#-----------------------------------------------------------------------------------------
timeplot_df <- dplyr::bind_rows(timeplot_df, timeplot_dt)

#  Mask and App as Share of all Covid Messages...
#-----------------------------------------------------------------------------------------
annotate2 <- data.frame(matrix(nrow= 2, ncol = 2))
colnames(annotate2) <- c("label_event", "date_event")
annotate2$label_event <- c("Introduction of mandatory masks in\npublic transport and shopping centers","Introduction of the SwissCovid App")

annotate2$date_event <- c(as.Date("2020-07-06"),as.Date("2020-06-25"))

annotate2$topic <- c("Face Masks", "Covid App")
annotate2$topic <- factor(annotate2$topic, levels=c('Face Masks','Covid App'))

# Rename Masks ad App:
timeplot_df <- timeplot_df %>% mutate(topic = as.character(topic)) %>%
                               mutate(topic = case_when(topic %in% c("Masks") ~ "Face Masks",
                                                        topic %in% c("App") ~ "Covid App",
                                                        TRUE ~ topic))
timeplot_df$topic <- as.factor(timeplot_df$topic)
timeplot_df$topic <- factor(timeplot_df$topic, levels=c('Face Masks','Covid App'))

# Figure 2:
topics_filter <- c("Face Masks", "Covid App")
plots <- list()

timeplot_df$Akteur_Art <- recode(timeplot_df$Akteur_Art ,
                                 `Party` = "Tweets by parties",
                                 `Politican` = "Tweets by politicians",
                                 `SnowBallers` = "Tweets by attentive public",
                                 `Media` = "Newspapers")

for(i in 1:length(topics_filter)){
  tmp_d <- timeplot_df %>% dplyr::filter(topic == topics_filter[i])
  
  annotate2_d <- annotate2 %>% dplyr::filter(topic == topics_filter[i])
  
  plots[[i]] <- ggplot(data = tmp_d) +
                  geom_line(aes(x = week, y = freq, color = Akteur_Art), alpha = .7, size = 1.2) +
                  geom_vline(data = annotate2_d, 
                             aes(xintercept = ymd(date_event)))  +
                  geom_text(data = annotate2_d, 
                            aes(x = ymd(date_event)- 5, y = 90, label = label_event), 
                            hjust = "right", color = "black", angle = 0, size = 5.0) +
                  scale_y_continuous(limits=c(0,100), expand = c(0,0)) +
                  scale_x_date(date_breaks = "months", limits = c(as.Date("2020-02-24"),as.Date(lim_max)), labels = date_format("%b")) +
                  scale_color_manual(values = c("#DD2461", "#999999", "#E69F00", "#0072B2"),
                                     labels = c("Newspapers","Tweets by Parties","Tweets by Politicians","Tweets by attentive public")) +
                  labs(y = 'Share [%]', x = 'Date',
                       color='Group of Users: ') +
                  ddl_theme(panel.grid.major = element_blank(),
                            panel.grid.minor = element_blank(),
                            panel.grid.major.y = element_line(size=.3, color = "lightgrey")) +
                  facet_wrap(~Akteur_Art, ncol = 1, scales = "fixed") +
                  theme(legend.position = "none", 
                        strip.background = element_blank(), strip.text = element_text(color = "black"),
                        axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 1, size = 16, color = "black"),  
                        axis.text.y = element_text(hjust=.5, size = 16, color = "black"),
                        strip.text.x = element_text(size = 18, color = "black"),
                        axis.title = element_text(size = 16, color = "black"),
                        plot.title = element_text(size = 20, color = "black"),
                        legend.text = element_text(size = 16, color = "black"),
                        plot.margin = unit(c(1,1.3,.5,.5), "cm"),
                        legend.key.size = unit(1.5,"line"),
                        axis.line.x = element_line(color="black", size = .5),
                        axis.line.y = element_line(color="black", size = .5))
                                     
}



cowplot::plot_grid(plotlist = plots, align = "hv", nrow = 1, ncol = 2, label_x = c(.326,.345),
                   labels = topics_filter, label_size = 20, label_fontface = "bold")

ggsave("../images/figure_two.pdf", width = 14, height = 19, device = cairo_pdf)


